{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import featuretools as ft\n",
    "from featuretools.primitives import IsFederalHoliday, DistanceToHoliday\n",
    "from woodwork.logical_types import Categorical"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>invoice</th>\n",
       "      <th>invoice_date</th>\n",
       "      <th>stock_code</th>\n",
       "      <th>description</th>\n",
       "      <th>quantity</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13085.0</td>\n",
       "      <td>489434</td>\n",
       "      <td>2009-12-01 07:45:00</td>\n",
       "      <td>85048</td>\n",
       "      <td>15CM CHRISTMAS GLASS BALL 20 LIGHTS</td>\n",
       "      <td>12</td>\n",
       "      <td>6.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>13085.0</td>\n",
       "      <td>489434</td>\n",
       "      <td>2009-12-01 07:45:00</td>\n",
       "      <td>79323P</td>\n",
       "      <td>PINK CHERRY LIGHTS</td>\n",
       "      <td>12</td>\n",
       "      <td>6.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13085.0</td>\n",
       "      <td>489434</td>\n",
       "      <td>2009-12-01 07:45:00</td>\n",
       "      <td>79323W</td>\n",
       "      <td>WHITE CHERRY LIGHTS</td>\n",
       "      <td>12</td>\n",
       "      <td>6.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>13085.0</td>\n",
       "      <td>489434</td>\n",
       "      <td>2009-12-01 07:45:00</td>\n",
       "      <td>22041</td>\n",
       "      <td>RECORD FRAME 7\" SINGLE SIZE</td>\n",
       "      <td>48</td>\n",
       "      <td>2.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13085.0</td>\n",
       "      <td>489434</td>\n",
       "      <td>2009-12-01 07:45:00</td>\n",
       "      <td>21232</td>\n",
       "      <td>STRAWBERRY CERAMIC TRINKET BOX</td>\n",
       "      <td>24</td>\n",
       "      <td>1.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id invoice        invoice_date stock_code  \\\n",
       "0      13085.0  489434 2009-12-01 07:45:00      85048   \n",
       "1      13085.0  489434 2009-12-01 07:45:00     79323P   \n",
       "2      13085.0  489434 2009-12-01 07:45:00     79323W   \n",
       "3      13085.0  489434 2009-12-01 07:45:00      22041   \n",
       "4      13085.0  489434 2009-12-01 07:45:00      21232   \n",
       "\n",
       "                           description  quantity  price  \n",
       "0  15CM CHRISTMAS GLASS BALL 20 LIGHTS        12   6.95  \n",
       "1                   PINK CHERRY LIGHTS        12   6.75  \n",
       "2                  WHITE CHERRY LIGHTS        12   6.75  \n",
       "3         RECORD FRAME 7\" SINGLE SIZE         48   2.10  \n",
       "4       STRAWBERRY CERAMIC TRINKET BOX        24   1.25  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load data\n",
    "\n",
    "df = pd.read_csv(\"retail.csv\", parse_dates=[\"invoice_date\"])\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create and entity set\n",
    "\n",
    "es = ft.EntitySet(id=\"data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\woodwork\\type_sys\\utils.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\woodwork\\type_sys\\utils.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\woodwork\\type_sys\\utils.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\woodwork\\type_sys\\utils.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\woodwork\\type_sys\\utils.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n",
      "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\woodwork\\type_sys\\utils.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  pd.to_datetime(\n"
     ]
    }
   ],
   "source": [
    "# Add the data to the entity\n",
    "\n",
    "es = es.add_dataframe(\n",
    "    dataframe=df,              # the dataframe with the data\n",
    "    dataframe_name=\"data\",     # unique name to associate with this dataframe\n",
    "    index=\"rows\",              # column name to index the items\n",
    "    make_index=True,           # if true, create a new column with unique values\n",
    "    time_index=\"invoice_date\", # column containing time data\n",
    "    logical_types={\n",
    "        \"customer_id\": Categorical, # the id is numerical, but should be handled as categorical\n",
    "    },\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Entityset: data\n",
       "  DataFrames:\n",
       "    data [Rows: 741301, Columns: 8]\n",
       "    invoices [Rows: 40505, Columns: 3]\n",
       "  Relationships:\n",
       "    data.invoice -> invoices.invoice"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create a new dataframe with invoices\n",
    "# indicating its relationship to the main data\n",
    "\n",
    "es.normalize_dataframe(\n",
    "    base_dataframe_name=\"data\",     # Datarame name from which to split.\n",
    "    new_dataframe_name=\"invoices\",  # Name of the new dataframe.\n",
    "    index=\"invoice\",                # relationship will be created across this column.\n",
    "    copy_columns=[\"customer_id\"],   # columns to remove from base_dataframe and move to new dataframe.\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>invoice</th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_data_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>489434</th>\n",
       "      <td>489434</td>\n",
       "      <td>13085.0</td>\n",
       "      <td>2009-12-01 07:45:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489435</th>\n",
       "      <td>489435</td>\n",
       "      <td>13085.0</td>\n",
       "      <td>2009-12-01 07:46:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489436</th>\n",
       "      <td>489436</td>\n",
       "      <td>13078.0</td>\n",
       "      <td>2009-12-01 09:06:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489437</th>\n",
       "      <td>489437</td>\n",
       "      <td>15362.0</td>\n",
       "      <td>2009-12-01 09:08:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489438</th>\n",
       "      <td>489438</td>\n",
       "      <td>18102.0</td>\n",
       "      <td>2009-12-01 09:24:00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       invoice customer_id     first_data_time\n",
       "489434  489434     13085.0 2009-12-01 07:45:00\n",
       "489435  489435     13085.0 2009-12-01 07:46:00\n",
       "489436  489436     13078.0 2009-12-01 09:06:00\n",
       "489437  489437     15362.0 2009-12-01 09:08:00\n",
       "489438  489438     18102.0 2009-12-01 09:24:00"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "es[\"invoices\"].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# I want to know if the day was a bank holiday\n",
    "\n",
    "is_bank_hol = IsFederalHoliday(country=\"UK\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['May Day',\n",
       " 'Good Friday',\n",
       " 'Wedding of William and Catherine',\n",
       " 'Coronation of Charles III',\n",
       " 'Christmas Day',\n",
       " 'Wedding of Charles and Diana',\n",
       " 'Christmas Day (observed)',\n",
       " 'State Funeral of Queen Elizabeth II',\n",
       " 'Silver Jubilee of Elizabeth II',\n",
       " 'Spring Bank Holiday',\n",
       " 'Diamond Jubilee of Elizabeth II',\n",
       " 'Boxing Day (observed)',\n",
       " 'Platinum Jubilee of Elizabeth II',\n",
       " \"New Year's Day (observed)\",\n",
       " 'Boxing Day',\n",
       " 'Golden Jubilee of Elizabeth II',\n",
       " 'Millennium Celebrations',\n",
       " \"New Year's Day\"]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hols = is_bank_hol.holidayUtil.federal_holidays.values()\n",
    "\n",
    "available_hols = list(set(hols))\n",
    "\n",
    "available_hols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# I want to know how close we are to boxing day\n",
    "\n",
    "days_to_boxing = DistanceToHoliday(holiday=\"Boxing Day\", country=\"UK\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "date_primitives = [\n",
    "    \"day\", \"year\", \"month\", \"weekday\",\n",
    "    \"days_in_month\", \"part_of_day\",\n",
    "    \"hour\", \"minute\",\n",
    "    is_bank_hol,\n",
    "    days_to_boxing\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Feature: customer_id>,\n",
       " <Feature: DAY(first_data_time)>,\n",
       " <Feature: DAYS_IN_MONTH(first_data_time)>,\n",
       " <Feature: DISTANCE_TO_HOLIDAY(first_data_time, holiday=Boxing Day, country=UK)>,\n",
       " <Feature: HOUR(first_data_time)>,\n",
       " <Feature: IS_FEDERAL_HOLIDAY(first_data_time, country=UK)>,\n",
       " <Feature: MINUTE(first_data_time)>,\n",
       " <Feature: MONTH(first_data_time)>,\n",
       " <Feature: PART_OF_DAY(first_data_time)>,\n",
       " <Feature: WEEKDAY(first_data_time)>,\n",
       " <Feature: YEAR(first_data_time)>]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create datetime features\n",
    "\n",
    "feature_matrix, feature_defs = ft.dfs(\n",
    "    entityset=es,                       # the entity set\n",
    "    target_dataframe_name=\"invoices\",   # the dataframe for wich to create the feature\n",
    "    agg_primitives=[],                  # we need an empty list to avoid returning the defo parameters\n",
    "    trans_primitives=date_primitives,   # the date features to extract\n",
    ")\n",
    "\n",
    "# display name of created features\n",
    "feature_defs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>DAY(first_data_time)</th>\n",
       "      <th>DAYS_IN_MONTH(first_data_time)</th>\n",
       "      <th>DISTANCE_TO_HOLIDAY(first_data_time, holiday=Boxing Day, country=UK)</th>\n",
       "      <th>HOUR(first_data_time)</th>\n",
       "      <th>IS_FEDERAL_HOLIDAY(first_data_time, country=UK)</th>\n",
       "      <th>MINUTE(first_data_time)</th>\n",
       "      <th>MONTH(first_data_time)</th>\n",
       "      <th>PART_OF_DAY(first_data_time)</th>\n",
       "      <th>WEEKDAY(first_data_time)</th>\n",
       "      <th>YEAR(first_data_time)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>invoice</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>489434</th>\n",
       "      <td>13085.0</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>25.0</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>45</td>\n",
       "      <td>12</td>\n",
       "      <td>early morning</td>\n",
       "      <td>1</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489435</th>\n",
       "      <td>13085.0</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>25.0</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>46</td>\n",
       "      <td>12</td>\n",
       "      <td>early morning</td>\n",
       "      <td>1</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489436</th>\n",
       "      <td>13078.0</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>25.0</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>6</td>\n",
       "      <td>12</td>\n",
       "      <td>late morning</td>\n",
       "      <td>1</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489437</th>\n",
       "      <td>15362.0</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>25.0</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>8</td>\n",
       "      <td>12</td>\n",
       "      <td>late morning</td>\n",
       "      <td>1</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489438</th>\n",
       "      <td>18102.0</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>25.0</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>24</td>\n",
       "      <td>12</td>\n",
       "      <td>late morning</td>\n",
       "      <td>1</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        customer_id DAY(first_data_time) DAYS_IN_MONTH(first_data_time)  \\\n",
       "invoice                                                                   \n",
       "489434      13085.0                    1                             31   \n",
       "489435      13085.0                    1                             31   \n",
       "489436      13078.0                    1                             31   \n",
       "489437      15362.0                    1                             31   \n",
       "489438      18102.0                    1                             31   \n",
       "\n",
       "         DISTANCE_TO_HOLIDAY(first_data_time, holiday=Boxing Day, country=UK)  \\\n",
       "invoice                                                                         \n",
       "489434                                                25.0                      \n",
       "489435                                                25.0                      \n",
       "489436                                                25.0                      \n",
       "489437                                                25.0                      \n",
       "489438                                                25.0                      \n",
       "\n",
       "        HOUR(first_data_time)  \\\n",
       "invoice                         \n",
       "489434                      7   \n",
       "489435                      7   \n",
       "489436                      9   \n",
       "489437                      9   \n",
       "489438                      9   \n",
       "\n",
       "         IS_FEDERAL_HOLIDAY(first_data_time, country=UK)  \\\n",
       "invoice                                                    \n",
       "489434                                             False   \n",
       "489435                                             False   \n",
       "489436                                             False   \n",
       "489437                                             False   \n",
       "489438                                             False   \n",
       "\n",
       "        MINUTE(first_data_time) MONTH(first_data_time)  \\\n",
       "invoice                                                  \n",
       "489434                       45                     12   \n",
       "489435                       46                     12   \n",
       "489436                        6                     12   \n",
       "489437                        8                     12   \n",
       "489438                       24                     12   \n",
       "\n",
       "        PART_OF_DAY(first_data_time) WEEKDAY(first_data_time)  \\\n",
       "invoice                                                         \n",
       "489434                 early morning                        1   \n",
       "489435                 early morning                        1   \n",
       "489436                  late morning                        1   \n",
       "489437                  late morning                        1   \n",
       "489438                  late morning                        1   \n",
       "\n",
       "        YEAR(first_data_time)  \n",
       "invoice                        \n",
       "489434                   2009  \n",
       "489435                   2009  \n",
       "489436                   2009  \n",
       "489437                   2009  \n",
       "489438                   2009  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dataframe with the new features\n",
    "\n",
    "feature_matrix.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DISTANCE_TO_HOLIDAY(first_data_time, holiday=Boxing Day, country=UK)</th>\n",
       "      <th>HOUR(first_data_time)</th>\n",
       "      <th>IS_FEDERAL_HOLIDAY(first_data_time, country=UK)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>invoice</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>489434</th>\n",
       "      <td>25.0</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489435</th>\n",
       "      <td>25.0</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489436</th>\n",
       "      <td>25.0</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489437</th>\n",
       "      <td>25.0</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489438</th>\n",
       "      <td>25.0</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         DISTANCE_TO_HOLIDAY(first_data_time, holiday=Boxing Day, country=UK)  \\\n",
       "invoice                                                                         \n",
       "489434                                                25.0                      \n",
       "489435                                                25.0                      \n",
       "489436                                                25.0                      \n",
       "489437                                                25.0                      \n",
       "489438                                                25.0                      \n",
       "\n",
       "        HOUR(first_data_time)  IS_FEDERAL_HOLIDAY(first_data_time, country=UK)  \n",
       "invoice                                                                         \n",
       "489434                      7                                            False  \n",
       "489435                      7                                            False  \n",
       "489436                      9                                            False  \n",
       "489437                      9                                            False  \n",
       "489438                      9                                            False  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "columns = [\n",
    "    \"DISTANCE_TO_HOLIDAY(first_data_time, holiday=Boxing Day, country=UK)\",\n",
    "    \"HOUR(first_data_time)\",\n",
    "    \"IS_FEDERAL_HOLIDAY(first_data_time, country=UK)\",\n",
    "]\n",
    "\n",
    "feature_matrix[columns].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fsml",
   "language": "python",
   "name": "fsml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "220.467px"
   },
   "toc_section_display": "block",
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
